/**
 * 
 */
package edu.umd.clip.lm.programs;

import java.io.*;
import java.nio.channels.*;
import edu.berkeley.nlp.util.*;

import edu.umd.clip.jobs.*;
import edu.umd.clip.lm.model.*;
import edu.umd.clip.lm.model.data.*;
import edu.umd.clip.lm.ngram.*;
import edu.umd.clip.lm.util.IO;

/**
 * @author Denis Filimonov <den@cs.umd.edu>
 *
 */
public class NgramSampler {
	public static class Options {
        @Option(name = "-config", required = true, usage = "XML config file")
		public String config;
        @Option(name = "-forest-files", required = true, usage = "comma-separated list of ngram forest files in low-to-high order")
		public String lmFiles;
        @Option(name = "-jobs", usage = "number of concurrent jobs (default: 1)")
        public int jobs = 1;
        @Option(name = "-debug", usage = "decoder debug level (default: 0)")
        public int debug = 0;
        @Option(name = "-output", required = false, usage = "Output training data (default: stdout)")
		public String output;
        @Option(name = "-samples", usage = "number of samples")
        public long samples;
        @Option(name = "-sampler", usage = "type of sampler: (default|sequence)")
        public String sampler = "default";
        
	}

	/**
	 * @param args
	 * @throws IOException 
	 * @throws ClassNotFoundException 
	 */
	public static void main(String[] args) throws IOException, ClassNotFoundException {
        OptionParser optParser = new OptionParser(Options.class);
        final Options opts = (Options) optParser.parse(args, true);
		
        Experiment.initialize(opts.config);
		Experiment experiment = Experiment.getInstance();
		
		experiment.buildPrefixes();
		experiment.buildWordPrefixes();

		JobManager.initialize(opts.jobs);
		JobManager manager = JobManager.getInstance();
		Thread thread = new Thread(manager, "Job Manager");
		thread.setDaemon(true);
		thread.start();

		final String lmStr[] = opts.lmFiles.split(",");
		final NgramModel models[] = new NgramModel[lmStr.length];
		
		System.err.println("Loading models...");
		
		JobGroup group = manager.createJobGroup("forest loading");
		for(int i=0; i<lmStr.length; ++i) {
			final int lmNum = i;
			Runnable run = new Runnable() {
				@Override
				public void run() {
					ObjectInputStream ois;
					try {
						ois = new ObjectInputStream(IO.getInputStream(lmStr[lmNum]));
						models[lmNum] = (NgramModel) ois.readObject();
						System.err.printf("...loaded %s\n", models[lmNum].getName());
						ois.close();
					} catch (IOException e) {
						// TODO Auto-generated catch block
						e.printStackTrace();
					} catch (ClassNotFoundException e) {
						// TODO Auto-generated catch block
						e.printStackTrace();
					}
				}
			};
			Job job = new Job(run, "load forest");
			manager.addJob(group, job);
		}
		group.join();
		
		
		FileChannel output;
		if (opts.output == null) {
			output = new FileOutputStream(FileDescriptor.out).getChannel();
		} else {
			output = new FileOutputStream(opts.output).getChannel();
		}
		TrainingDataWriter writer = new OnDiskTrainingDataWriter(output);
		WritableTrainingData data = writer.createData();

		System.err.println("Starting sampling");
		
		data.setContextSize((byte) (experiment.getLM().getOrder()-1));
		data.start();
		
		if ("default".equals(opts.sampler)) {
			TrainingDataSampler sampler = new TrainingDataSampler(models);
			sampler.doSampling(opts.samples, data);
		} else if ("sequence".equals(opts.sampler)) {
			assert(models.length == 2);
			TrainingDataSequenceSampler sampler = new TrainingDataSequenceSampler(models[0], models[1]);
			sampler.doSampling(opts.samples, data);
		} else {
			System.err.printf("bad sampler type: %s\n", opts.sampler);
		}
		data.finish();
		output.close();
	}

}
